
# TFS checked in - 02/15/2016 - jbucci 
###########################################################################
###########################################################################
# data exploration and QC
###########################################################################
###########################################################################


runQC = function(normalized,rawdata,thresholding.value,probe.annot,rawDataAll.list,normalizedData.list,HKnormalized,HKs,covariates,covariates.type,prb.sets=NULL,annotcols,annotcols2,
                 codecols,path.to.QC.results,log,plottypearg,path.results,path.inc,path.to.csvs)  
{
  print("Starting Overview")
  cat("LOG:Starting Overview",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Starting Overview</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  
  ### heatmap of detected/undetected calls:
  detected = rawdata*NA
  # make detection calls analyte by analyte:
  analytes = names(thresholding.value)
  for(an in analytes)
  {
    # if the thresholding value is specified, use it:
    if(is.numeric(thresholding.value[[an]]))
    {
      thresh = rep(thresholding.value[[an]],nrow(rawdata))
    }
    # if it's AUTO, set the thresh at double the median of the neg controls:
    if(thresholding.value[[an]]=="AUTO")
    {
      neg.probes = rownames(probe.annot)[(probe.annot$Control.Type=="Negative")&(probe.annot$Analyte.Type==an)]
      thresh = apply(rawdata[,neg.probes],1,median)*2
    }
    # which probes are this analyte:
    tempprobes = rownames(probe.annot)[probe.annot$Analyte.Type==an]
    # make the detection call:
    detected[,tempprobes] = sweep(rawdata[,tempprobes],1,thresh,">")
  }
  # give the matrix informative names:
  detected = rbind(probe.annot[colnames(detected),"Analyte.Type"],detected)
  colnames(detected) = probe.annot[colnames(detected),"Probe.Label"]
  # write the .csv:
  write.csv(detected,file=paste0(path.to.QC.results,"//above background detection call.csv"))
  
  
  ## heatmap of the raw data to give a sense of which genes and samples have good data:
  
  # Change column names: accession numbers to probe label + probe type - 02/03/2016 - jbucci
  probe.annot$probeLabel.Type <- paste(probe.annot$Probe.Label, probe.annot$Analyte.Type, sep = " - ")

  if(length(covariates)>0)
  {
    rowcols = annotcols
  }

  t.rawData.list <- list()
  for(l in 1:length(rawDataAll.list)){
    rawData.sub           <- rawDataAll.list[[l]]
    row.col.Match         <- match(colnames(rawData.sub), rownames(probe.annot))
    colnames(rawData.sub) <- probe.annot$probeLabel.Type[row.col.Match]
      
    if(dim(rawData.sub)[l] == 0){
      next
    } else {
        t.rawData.list[[l]]      <- t(rawData.sub)
        names(t.rawData.list)[l] <- names(rawDataAll.list)[l]
    }
  }

  #linearbreaks = c(min(c(50,neg.control.median)),50,100,500,10000000)
  linearbreaks = c(25,50,100,500,10000000) 
  breaks = c(0,(linearbreaks))
  hmcols = c("red","orange","gold","grey","cornflowerblue")

  rowColor.list <- list()
  for(g in 1:length(t.rawData.list)){
    rowColor.list[[g]] <-matrix(rep(c(gray.colors(5)[g]), dim(t.rawData.list[[g]])[1]))
  }

  for(i in 1:length(t.rawData.list))
  {
    rownames(rowColor.list[[i]])     <- rownames(t.rawData.list[[i]])
    colnames(t.rawData.list[[i]])   <- strtrim(abbreviate(toupper(colnames(t.rawData.list[[i]]))), width = 30)  #colnames(t.rawData.list[[i]]) strtrim(x, width)
  }

  for(r in 1:length(plottypearg)){
    plottype=plottypearg[r];
    tempfilename = drawplot(filename=paste(path.to.QC.results,"//raw data heatmap",sep=""),plottype,width=2,height=2,heatmapres=TRUE)
    tempfilename=gsub(path.results,"results",tempfilename)
    par(oma=c(1,4,1,1))  
    par(mar=c(10,8,2,7))
    heatmap.plus.multipanel(Xlist = t.rawData.list, rowcolorlist = rowColor.list, ColSideColors = as.matrix(rowcols), scale="n", breaks=breaks, margins=c(12,12), col=hmcols, cexCol = 0.85, cexRow = 0.75)
    dev.off()
  }

  for(r in 1:length(plottypearg)){
    plottype=plottypearg[r];
    tempfilename = drawplot(filename=paste(path.to.QC.results,"//raw data heatmap - color key",sep=""),plottype,width=0.5,height=1.25)
    tempfilename=gsub(path.results,"results",tempfilename)
    par(mar=c(0,0,0,0))
    #frame(); legend("center",lty=1,lwd=20,col = hmcols,legend = c("< Neg ERCC median","< 50","< 100","< 500",">= 500"),cex=2)
    frame(); legend("center",lty=1,lwd=12,col = hmcols,legend = c("< 25","< 50","< 100","< 500",">= 500"))
    dev.off()
  }
  
  # table of detected/not:
  # (first turn rawData.list into the matrix raw:)
  raw = c(); for(i in 1:length(rawDataAll.list)){raw = cbind(raw,rawDataAll.list[[i]])}
  #tab = cbind(colMeans(raw<neg.control.median),colMeans((raw>=neg.control.median)&(raw<50)),colMeans((raw>=50)&(raw<100)),colMeans((raw>=100)))
  #colnames(tab) = c("< Neg control median","Neg control median - 50","50 - 100","> 100")
  tab = cbind(colMeans(raw<25),colMeans((raw>=25)&(raw<50)),colMeans((raw>=50)&(raw<100)),colMeans((raw>=100)))
  colnames(tab) = c("< 25","25 - 50","50 - 100","> 100")
  tab = tab[order(tab[,1],decreasing=TRUE),]
  head(tab)
  write.csv(tab,file = paste0(path.to.QC.results,"//detected rates for all genes.csv"))
  
  ## now perform the rest of QC on the normalized data:
  alldata = cbind(normalized,HKnormalized)

  row.col.Match <- match(colnames(alldata), rownames(probe.annot))
  colnames(alldata) <- probe.annot$probeLabel.Type[row.col.Match]

  genecols = c(rep(codecols[1],dim(normalized)[2]))
  if(!is.null(HKnormalized))
    genecols <- c(genecols,rep(codecols[2],dim(HKnormalized)[2]))
  #genecols[is.element(dimnames(alldata)[[2]],HKs)]=codecols[3]
  genecols[which(rownames(probe.annot)[row.col.Match] %in% HKs)] <- codecols[3]

  # plot mean vs. sd in normalized data, color points by type: endog, HK (selected), HK (unselected)
  means = apply(alldata,2,mean)
  vars = apply(alldata,2,var)
  for(r in 1:length(plottypearg)){
    plottype=plottypearg[r];
    tempfilename = drawplot(filename=paste(path.to.QC.results,"//mean vs var for all genes",sep=""),plottype)
    tempfilename=gsub(path.results,"results",tempfilename)
    par(xpd=TRUE)
    par(mar=c(5,4,4,1))
    plot(vars~means,col=genecols,pch=(1+15*is.element(dimnames(alldata)[[2]],dimnames(HKnormalized)[[2]])),xlab="Mean(log2 expression)",ylab="Variance(log2 expression)")
    legend("topleft",bty="n",inset=c(0,-0.16),col=codecols[c(1,3,2)][codecols[c(1,3,2)] %in% genecols],pch=c(1,16,16)[codecols[c(1,3,2)] %in% genecols],legend=c("Endogenous genes","Housekeepers used in normalization","Housekeepers - unused")[codecols[c(1,3,2)] %in% genecols])
    topvars = order(vars,decreasing=TRUE)[1:20]
    text(means[topvars],vars[topvars],dimnames(alldata)[[2]][topvars], cex = 0.75)
    dev.off()
  }
  
  mean.var.normalized.df <- cbind(means,vars)
  dimnames(mean.var.normalized.df)[[2]] <- c("Average-normalized count", "Variance-normalized count")

  write.csv(mean.var.normalized.df, file = paste0(path.to.QC.results,"//QC module - Mean.Var.normalized.counts.csv", sep = ""), row.names = TRUE)

  #### plot relationships btw covariates:
  print("Creating Overview plots")
  cat("LOG:Creating Overview plots",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Creating Overview plots</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  
  for(i in 1:(dim(covariates)[2]))
  {
    # make a univariate histogram or barplot:
    for(r in 1:length(plottypearg)){
      plottype=plottypearg[r];
      tempfilename = drawplot(filename=paste(path.to.QC.results,"//univariate plot of covariates - ",dimnames(covariates)[[2]][i],sep=""),plottype)
      tempfilename=gsub(path.results,"results",tempfilename)
      if(covariates.type[i]=="continuous")
      {
        hist(covariates[,i],xlab="",main=paste("Distribution of",colnames(covariates)[i]))
      }
      if(covariates.type[i]=="categorical")
      {
        barplot(table(covariates[,i]),main=paste("Distribution of",colnames(covariates)[i]),las=2,border=annotcols2[[i]])
      }  
      dev.off()
    }
  }
  if(dim(covariates)[2]>1)
  {
    # plot a matrix of results.  Scatterplots/boxplots/tables on top right, p-values for association on bottom left:
    for(i in 1:(dim(covariates)[2]-1))
    {
      for(j in (i+1):dim(covariates)[2])
      {
        for(r in 1:length(plottypearg)){
          plottype=plottypearg[r];
          #if(!(covariates.type[i]=="categorical")&(covariates.type[j]=="categorical"))
          #{
          tempfilename = drawplot(filename=paste(path.to.QC.results,"//pairs plot of covariates - ",dimnames(covariates)[[2]][i]," vs ",dimnames(covariates)[[2]][j],sep=""),plottype)
          tempfilename=gsub(path.results,"results",tempfilename)
          #}
          # scatterplot between continuous covariates:
          if((covariates.type[i]=="continuous")&(covariates.type[j]=="continuous"))
          {
            plot(covariates[,i],covariates[,j],xlab=dimnames(covariates)[[2]][i],ylab=dimnames(covariates)[[2]][j])
          }
          # boxplot between cont and cat:
          par(mar=c(5.1,6,4.1,2.1))
          if((covariates.type[i]=="continuous")&(covariates.type[j]=="categorical"))
          {
            par(mar=c(8,6,2,1))
            boxplot(covariates[,i]~covariates[,j],ylab=dimnames(covariates)[[2]][i],las=2,border=annotcols2[[j]],
                    main=paste(dimnames(covariates)[[2]][i],"vs.",dimnames(covariates)[[2]][j])) 
            
          }
          if((covariates.type[j]=="continuous")&(covariates.type[i]=="categorical"))
          {
            par(mar=c(8,6,2,1))
            boxplot(covariates[,j]~covariates[,i],ylab=dimnames(covariates)[[2]][j],las=2,border=annotcols2[[i]],
                    main=paste(dimnames(covariates)[[2]][j],"vs.",dimnames(covariates)[[2]][i]))  
          }
          # compare cat to cat:
          if((covariates.type[i]=="categorical")&(covariates.type[j]=="categorical"))
          {
            tab = as.matrix(table(covariates[,c(i,j)]))
            flip = dim(tab)[1]>dim(tab)[2]
            annotcodecol = i
            if(flip)
            {
              tab = t(tab)
              # color by i or j?
              annotcodecol = j
            }
            barplot(tab,col=annotcols2[[annotcodecol]],las=2,main=paste(dimnames(covariates)[[2]][j],"vs.",dimnames(covariates)[[2]][i]))  
            
            #write.csv(tab,file=paste(path.to.QC.results,"//pairs plot of covariates - ",dimnames(covariates)[[2]][i]," vs ",dimnames(covariates)[[2]][j],".csv",sep=""))
          }
          #if(!(covariates.type[i]=="categorical")&(covariates.type[j]=="categorical")){dev.off()}}          
          dev.off()}
      }
    }
  }
  write.csv(covariates,file="QC module - sample annot.csv")
  
  ### Implementation of multi.panel/multi.analyte heatmaps - 1/28/2016 - jbucci
  ### heatmap of all data, arranged by pathway membership  
  #hm1 = heatmap.2(t(normalized),scale="row")
  # first, fill in the row colors matrix:
  if(length(covariates)>0)
  {
    rowcols = annotcols
    cat("dimnames(covariates):",file=log,sep='\n',append=TRUE)
    suppressWarnings(write.table(dimnames(covariates)[[2]],file=log,sep='\t',append=TRUE))
    suppressWarnings(write.table(dimnames(covariates)[[1]][1:5],file=log,sep='\t',append=TRUE))
    cat("dim(rowcols)",file=log,sep='\n',append=TRUE)
    cat(dim(rowcols),file=log,sep='\n',append=TRUE)
    dimnames(rowcols)=dimnames(covariates)
#    dimnames(rowcols)[[1]]=dimnames(covariates)[[1]][1:25]
#    dimnames(rowcols)[[2]]=dimnames(covariates)[[2]]
    #if(i==1){rowcols = cbind(rowcols,"white")}
    #if(i==1){rowcols = cbind(rowcols,rowcols)}
  }
  #  if(length(prb.sets)>0)
  #  {
  #    bigmatrix = t(normalized)
  #    pathsizes = dim(bigmatrix)[1]
  #  }
  if(length(prb.sets)>0)
  {
    #    bigmatrix = c()
    #    pathsizes = c()
    #    pathbreaks = c()
    #    pathcolors = c()
    prb.sets = prb.sets[match(dimnames(normalized)[[2]],dimnames(prb.sets)[[1]]),]
    for(k in 1:dim(prb.sets)[2])
    {
      print(paste("Creating Overview heatmaps for gene set ",k,"/",dim(prb.sets)[2]))
      cat(paste("LOG: Creating Overview heatmaps for gene set ",k,"/",dim(prb.sets)[2]),file=log,sep='\n\n',append=TRUE)
      cat(paste("document.write('<p>Creating Overview heatmaps for gene set ",k,"/",dim(prb.sets)[2],"</p>');"), file=paste(path.inc,"//status.js",sep=""),append=TRUE)
      
      # isolate data matrix from the current gene set:
      #pathmatrix = t(normalized[,dimnames(prb.sets)[[1]][prb.sets[,k]==1]])

      pathmatrix.list <- list()
      for(l in 1:length(normalizedData.list)){
        #(colnames(normalizedData.list[[1]]) %in% dimnames(prb.sets)[[1]][prb.sets[,k]==1])
        pathmatrix.sub           <- normalizedData.list[[l]][,colnames(normalizedData.list[[l]]) %in% dimnames(prb.sets)[[1]][prb.sets[,k]==1], drop = FALSE]
        row.col.Match            <- match(colnames(pathmatrix.sub), rownames(probe.annot))
        colnames(pathmatrix.sub) <- probe.annot$probeLabel.Type[row.col.Match]

        if(dim(pathmatrix.sub)[l] == 0){
          next
        } else {
            pathmatrix.list[[l]]      <- t(pathmatrix.sub)
            names(pathmatrix.list)[l] <- names(normalizedData.list)[l]
        }
      }

      # run a heatmap on the pathway matrix to get a better ordering of the genes
      if(sum(unlist(lapply(pathmatrix.list,function(x)nrow(x))))>1){
        
        for(r in 1:length(plottypearg)){
          plottype=plottypearg[r];
          tempfilename = drawplot(filename=paste(path.to.QC.results,"//heatmap of data - ",make.names(dimnames(prb.sets)[[2]][k]),sep=""),plottype,
                                  #width=(1.5+1*(dim(normalized)[1]>50)),height=1.75,
                                  width = 2, height = 2, heatmapres=TRUE)
          #width=(3+2*(dim(normalized)[1]>50)),height=3.5)
          tempfilename=gsub(path.results,"results",tempfilename)
          par(oma=c(1,4,1,1))  
          par(mar=c(10,8,2,7))
          hmcols<-colorRampPalette(c("cornflowerblue","black","orange"))(256)
          
          rowColor.list <- list()
          for(g in 1:length(pathmatrix.list)){
            rowColor.list[[g]] <-matrix(rep(c(gray.colors(5)[g]), dim(pathmatrix.list[[g]])[1]))
          }
          
          for(i in 1:length(pathmatrix.list))
          {
            rownames(rowColor.list[[i]])     <- rownames(pathmatrix.list[[i]])
            colnames(pathmatrix.list[[i]])   <- strtrim(abbreviate(toupper(colnames(pathmatrix.list[[i]]))), width = 30) #colnames(pathmatrix.list[[i]])
          }
          
          if(length(covariates)>0){ heatmap.plus.multipanel(Xlist = pathmatrix.list, rowcolorlist = rowColor.list, ColSideColors = as.matrix(rowcols), margins=c(12,12), col=hmcols, cexCol = 0.85, cexRow = 0.75) }
          #if(length(covariates)>0){ heatmap.plus.modified(pathmatrix,symm=FALSE,Colv=TRUE,Rowv=TRUE,ColSideColors=as.matrix(rowcols),margins=c(8,8),col=hmcols,scale="row") }
          if(length(covariates)==0){ heatmap.plus.modified(pathmatrix,symm=FALSE,Colv=TRUE,Rowv=TRUE,col=hmcols,scale="row") }
          
          dev.off()
          
        }
        
      }

      
#      #### correlation matrix of all samples (just the pathway's data):  
#      cormat = cor((pathmatrix))
#      #neg.elements = sign(cormat)==-1
#      #cormat.rescaled = cormat/abs(min(cormat))
#      #cormat2 = replace(cormat,neg.elements,cormat.rescaled[neg.elements])
#      for(r in 1:length(plottypearg)){
#        plottype=plottypearg[r];
#        tempfilename = drawplot(filename=paste(path.to.QC.results,"//correlation between samples - heatmap - ",make.names(dimnames(prb.sets)[[2]][k]),sep=""),plottype,width=2,height=2,heatmapres=TRUE)
#        tempfilename=gsub(path.results,"results",tempfilename)
#        par(mar=c(10,4,2,1))
#        hmcols<-colorRampPalette(c("blue","antiquewhite3",codecols2[4]))(256)
#        hmcols = hmcols
#        breaks = seq(-1,max(abs(cormat)),length.out=length(hmcols)+1)
#        #if(length(covariates)>0){ heatmap.plus.modified(cormat,symm=TRUE,margins=c(8,8),ColSideColors=as.matrix(rowcols),RowSideColors=as.matrix(rowcols)) }
#        #if(length(covariates)>0){ heatmap.3(cormat,symm=TRUE,margins=c(8,8),RowSideColors=t(as.matrix(rowcols)),ColSideColors=as.matrix(rowcols),col=hmcols,Colv=TRUE) }
#        if(length(covariates)>0){ heatmap.plus.modified(cormat,symm=TRUE,margins=c(8,8),RowSideColors=as.matrix(rowcols),ColSideColors=as.matrix(rowcols),col=hmcols,Rowv=TRUE,Colv=TRUE) }
#        if(length(covariates)==0){ heatmap.plus.modified(cormat,symm=TRUE,margins=c(8,8),col=hmcols,Rowv=TRUE,Colv=TRUE) }
#        dev.off()
#      }


      pathmatrix = t(normalized[,dimnames(prb.sets)[[1]][prb.sets[,k]==1]])
      row.Match            <- match(rownames(pathmatrix), rownames(probe.annot))
      rownames(pathmatrix) <- probe.annot$probeLabel.Type[row.Match]

      if(nrow(pathmatrix)>1){
        # and draw a PCA plot (just the pathway data):
        pc = prcomp(t(pathmatrix))
        for(i in 1:dim(covariates)[2])
        {
          pointcols = annotcols[,i]
          for(r in 1:length(plottypearg)){
            plottype=plottypearg[r];
            tempfilename = drawplot(filename=paste(path.to.QC.results,"//PCA colored by ",dimnames(covariates)[[2]][i]," - ",make.names(dimnames(prb.sets)[[2]][k]),sep=""),plottype)
            tempfilename=gsub(path.results,"results",tempfilename)
            
            pch = 1+15*(nrow(pc$x)<100)
            pairs.adaptive.cex(dat = pc$x[,1:min(4,dim(pc$x)[2]),drop=FALSE],
                               col=pointcols,
                               xaxt="n",
                               yaxt="n",
                               pch=pch,
                               labels = paste("PC",1:4," - ",round((pc$sdev^2)[1:4]/sum(pc$sdev^2),2),sep=""))
            dev.off()
          }
          
        }
        
        # and draw the PC loadings:  #$#
        for(r in 1:length(plottypearg)){
          plottype=plottypearg[r];
          tempfilename = drawplot(filename=paste(path.to.QC.results,"//PCA loadings - ",make.names(dimnames(prb.sets)[[2]][k]),sep=""),plottype,height=2)
          tempfilename=gsub(path.results,"results",tempfilename)
          showpcs = min(4,ncol(pc$rotation))
          #par(mfrow = c(showpcs,1))        
          layout(matrix(c(1:showpcs)),heights = c(rep(1,showpcs-1),.8)) 
          par(mar = c(2,4,0,0))
          for(j in 1:showpcs)
          {
            if(j<showpcs){barplot(pc$rotation[,j],las=2,ylab=paste0("PC",j," loading"),names.arg="")}
            if(j==showpcs){par(mar = c(6,4,0,0));barplot(pc$rotation[,j],las=2,ylab=paste0("PC",j," loading"))}
          }
          dev.off()
        }
      }
    }
  }
  
  # plot the final heatmap:

  t.normalizedData.list <- list()
  for(l in 1:length(normalizedData.list)){
    normalizedData.sub           <- normalizedData.list[[l]]
    row.col.Match                <- match(colnames(normalizedData.sub), rownames(probe.annot))
    colnames(normalizedData.sub) <- probe.annot$probeLabel.Type[row.col.Match]
      
    if(dim(normalizedData.sub)[l] == 0){
      next
    } else {
        t.normalizedData.list[[l]]      <- t(normalizedData.sub)
        names(t.normalizedData.list)[l] <- names(normalizedData.list)[l]
    }
  }

  #dimnames(pathcolors)[[2]]=c("","")
  for(r in 1:length(plottypearg)){
    plottype=plottypearg[r];
    tempfilename = drawplot(filename=paste(path.to.QC.results,"//heatmap of all genes X samples",sep=""),plottype,height=2,width=2,heatmapres=TRUE)
    #tempfilename = drawplot(filename=paste(path.to.QC.results,"//heatmap of all genes X samples",sep=""),plottype,height=4,width=3)
    tempfilename=gsub(path.results,"results",tempfilename)
    par(oma=c(1,4,1,1))  
    par(mar=c(10,8,2,7))
    hmcols<-colorRampPalette(c("cornflowerblue","black","orange"),bias=1.0)(256)

    rowColor.list <- list()
    for(g in 1:length(t.normalizedData.list)){
      rowColor.list[[g]] <-matrix(rep(c(gray.colors(5)[g]), dim(t.normalizedData.list[[g]])[1]))
    }

    for(i in 1:length(t.normalizedData.list))
    {
      rownames(rowColor.list[[i]])           <- rownames(t.normalizedData.list[[i]])
      colnames(t.normalizedData.list[[i]])   <- strtrim(abbreviate(toupper(colnames(t.normalizedData.list[[i]]))), width = 30) #colnames(t.normalizedData.list[[i]])
    }
 
    if(length(covariates)>0){ heatmap.plus.multipanel(Xlist = t.normalizedData.list, rowcolorlist = rowColor.list, ColSideColors = as.matrix(rowcols), margins=c(12,12), col=hmcols, cexCol = 0.85, cexRow = 0.75) }
#   if(length(covariates)>0){ heatmap.plus.modified(t(normalized),symm=FALSE,Colv=TRUE,Rowv=TRUE,labRow=NA,margins=c(8,8),ColSideColors=as.matrix(rowcols),col=hmcols,scale="row") }
    if(length(covariates)==0){ heatmap.plus.modified(t(normalized),symm=FALSE,Colv=TRUE,Rowv=TRUE,labRow=NA,margins=c(8,8),col=hmcols,scale="row") }
    dev.off()
  }

  # Commenting out the sample correlation 
  # plot as it is not being used any more
  #--------------------------------------
  #### correlation matrix of all samples:  
  #cormat = cor(t(normalized))

  # corNorm.list <- list()
  # for(l in 1:length(normalizedData.list)){
  #   normalizedData.sub           <- normalizedData.list[[l]]
  #   row.col.Match                <- match(colnames(normalizedData.sub), rownames(probe.annot))
  #   colnames(normalizedData.sub) <- probe.annot$probeLabel.Type[row.col.Match]
  #     
  #   if(ncol(normalizedData.sub) <3){
  #     next
  #   } else {
  #       corNorm.list[[l]]      <- cor(t(normalizedData.sub))
  #       names(corNorm.list)[l] <- names(normalizedData.list)[l]
  #   }
  # }
  # for(r in 1:length(plottypearg)){
  #   plottype=plottypearg[r];
  #   tempfilename = drawplot(filename=paste(path.to.QC.results,"//correlation between samples - heatmap",sep=""),plottype,width=2,height=2,heatmapres=TRUE)
  #   tempfilename=gsub(path.results,"results",tempfilename)
  #   par(oma=c(1,4,1,1))  
  #   par(mar=c(10,8,2,10))
  #   hmcols<-colorRampPalette(c("blue","antiquewhite3",codecols2[4]))(256)
  #   breaks = seq(-1,1,length.out=length(hmcols)+1)
  # 
  #   rowColor.list <- list()
  #   for(g in 1:length(corNorm.list)){
  #     if(!is.null(corNorm.list[[g]]))
  #       rowColor.list[[g]] <-matrix(rep(c(gray.colors(5)[g]), dim(corNorm.list[[g]])[1]))
  #   }
  # 
  #   for(i in 1:length(corNorm.list))
  #   {
  #     if(!is.null(corNorm.list[[i]])){
  #       rownames(rowColor.list[[i]])  <- strtrim(abbreviate(toupper(rownames(corNorm.list[[i]]))), width = 30) #rownames(corNorm.list[[i]])
  #       rownames(corNorm.list[[i]])   <- strtrim(abbreviate(toupper(rownames(corNorm.list[[i]]))), width = 30) #colnames(corNorm.list[[i]])
  #       colnames(corNorm.list[[i]])   <- strtrim(abbreviate(toupper(colnames(corNorm.list[[i]]))), width = 30) #colnames(corNorm.list[[i]])
  #     }
  #   }
  #   
  #   #Prune empty elments
  #   #-------------------
  #   corNorm.list <- corNorm.list[which(unlist(lapply(corNorm.list,function(x)!is.null(x) )))]
  #   rowColor.list <- rowColor.list[which(unlist(lapply(rowColor.list,function(x)!is.null(x) )))]
  # 
  #   if(length(covariates)>0){ heatmap.plus.multipanel(Xlist = corNorm.list, rowcolorlist = rowColor.list, ColSideColors = as.matrix(rowcols), margins=c(12,12), col=hmcols, cexCol = 0.85, cexRow = 0.85) }
  #   if(length(covariates)==0){ heatmap.plus.modified(cormat,symm=TRUE,margins=c(8,8),col=hmcols,Colv=TRUE) }
  #   dev.off()
  # }
  
  
  ### PCA-based QC:
  pc = prcomp(normalized)
  ## automatically identify outliers in PCA:
  x = pc$x[,1:min(4,dim(pc$x)[2])]
  
  # outliers: anything > 3sds from mean:
  outliers = matrix(FALSE,dim(x)[1],dim(x)[2])
  dimnames(outliers)[[1]]=dimnames(normalized)[[1]]
  dimnames(outliers)[[2]] = paste("Outlier on PC",1:dim(x)[[2]])
  for(i in 1:dim(x)[2])
  {
    outliers[abs(x[,i]-mean(x[,i]))>3*sd(x[,i]),i]=TRUE   
  }
  # output list of outliers along with PC on which they're an outlier:
  if(sum(rowSums(outliers)>0)>1){outliers = outliers[rowSums(outliers)>0,]}
  if(sum(rowSums(outliers)>0)==1)
  {
    outliername = dimnames(outliers)[[1]][rowSums(outliers)>0] 
    outliers = t(as.matrix(outliers[rowSums(outliers)>0,]))
    dimnames(outliers)[[1]]=outliername
  }
  if(sum(rowSums(outliers)>0)>0){write.csv(outliers,file=paste(path.to.QC.results,"//outliers in first 4 principal components.csv",sep=""))}
  
  ## PC pairs plots colored by covariates:   #<---- modification: now using annotcols to harmonize colors.  if(FALSE) after this code chunk was the old coloring scheme
  #covariates = as.data.frame(covariates)
  if(dim(covariates)[2]>0)
  {
    for(i in 1:dim(covariates)[2])
    {
      pointcols = annotcols[,i]
      for(r in 1:length(plottypearg)){
        plottype=plottypearg[r];
        tempfilename = drawplot(filename=paste(path.to.QC.results,"//PCA colored by ",dimnames(covariates)[[2]][i],sep=""),plottype)
        tempfilename=gsub(path.results,"results",tempfilename)
        pch = 1+15*(nrow(pc$x)<100)
        pairs.adaptive.cex(pc$x[,1:min(4,dim(pc$x)[2])],col=pointcols,xaxt="n",yaxt="n",pch=pch,
              labels = paste("PC",1:4," - ",round((pc$sdev^2)[1:4]/sum(pc$sdev^2),2),sep=""))
        dev.off()}
      #for(r in 1:length(plottypearg)){
      #  plottype=plottypearg[r];
      #  tempfilename = drawplot(filename=paste(path.to.QC.results,"//PCA colored by ",dimnames(covariates)[[2]][i]," legend",sep=""),plottype,width=.5)
      #  tempfilename=gsub(path.results,"results",tempfilename)
      #  frame()
      #  legend("center",pch=1,col=c(NA,"deepskyblue2","firebrick2"),legend=c(main=names(covariates)[i],"low","high"))
      #  dev.off()}      
    }
    write.csv(covariates,file=paste(path.to.csvs,"//QC - covariates used in p-value histograms.csv",sep=""))
    
    ### p-value histograms of univariate analyses of ALL variables:
    for(r in 1:length(plottypearg)){
      print(paste("Creating Overview histograms ",r,"/",length(plottypearg)))
      cat(paste("LOG: Creating Overview histograms ",r,"/",length(plottypearg)),file=log,sep='\n\n',append=TRUE)
      cat(paste("document.write('<p>Creating Overview histograms ",r,"/",length(plottypearg),"</p>');"), file=paste(path.inc,"//status.js",sep=""),append=TRUE)
      plottype=plottypearg[r];
      
      if(ncol(covariates) > 2)
      {
        nrows = ceiling(ncol(covariates)/2)
        ncols = ceiling(ncol(covariates)/2)
      } else {
        nrows = 2
        ncols = 1
      }

      tempfilename = drawplot(filename=paste(path.to.QC.results,"//p-value histograms",sep=""),plottype,height=nrows*.8,width=ncols*.8)
      tempfilename=gsub(path.results,"results",tempfilename)    
      #if(dim(covariates)[[2]]<=4)
      #{
      #  par(mfrow=c(ceiling(dim(covariates)[[2]]/2),min(dim(covariates)[[2]],2)))
      #}
      #if(dim(covariates)[[2]]>4)
      #{
      #  par(mfrow=c(ceiling(dim(covariates)[[2]]/3),min(dim(covariates)[[2]],3)))
      #}
      par(mfrow=c(nrows,ncols), las = 1)
      for(i in 1:dim(covariates)[2])
      {
        pvals = c()
        for(j in 1:dim(normalized)[2])
        {
          mod = lm(cbind(normalized[,j],as.data.frame(covariates[,i])))      
          mod2 = lm(normalized[,j]~1) 
          # run LRT:
          LLR = logLik(mod)-logLik(mod2)
          df = length(mod$coef)-length(mod2$coef)
          pvals[j] = 1-pchisq(2*LLR,df)
        }
        hist(pvals,main=dimnames(covariates)[[2]][i],breaks=(0:20)/20,xlim=c(0,1),cex.main=1.25,cex.lab=1.25,xlab="p-values")
      }
      dev.off()
    }
  }

  print("Creating Overview HTML infrastructure")
  cat("LOG: Creating Overview HTML infrastructure",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Creating Overview HTML infrastructure</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
  
  print("Finished Overview")
  cat("LOG:Finished Overview",file=log,sep='\n\n',append=TRUE)
  cat("document.write('<p>Finished Overview</p>');", file=paste(path.inc,"//status.js",sep=""),append=TRUE)
} 
